package com.stu.jsoup.nw.wx;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.text.StringEscapeUtils;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.junit.jupiter.api.Test;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Paths;
import java.util.Arrays;

public class GetXueQiu {
    @Test
    public void get() throws Exception {
        String url = "https://mp.weixin.qq.com/s/hG4BB7o5lVCyRJM060nATw";
        String path = "D:\\";
        for (String ur : Arrays.asList("https://xueqiu.com/5389609232/209863782")) {
            Connection.Response response = Jsoup.connect(ur).method(Connection.Method.GET).execute();
            String htmlBody = response.body();
            Document doc = Jsoup.parse(response.body());

            //原始文章是 //开头的，如果抓取访问就是本地协议https://blog.csdn.net/weixin_42508745/article/details/82628353
            htmlBody = htmlBody.replace("href=\"//", "href=\"https://");
            htmlBody = htmlBody.replace("src=\"//", "src=\"https://");
            //替换名字，因为可能出现该名字不能创建文件夹
            Elements otherTitle = doc.getElementsByTag("title");
            String title = StringEscapeUtils.unescapeHtml4(otherTitle.text()).substring(0, 16);
            title = FilenameUtils.getBaseName(ur) + title.replaceAll("[/\\\\:*?|]", "_").replaceAll("[\"<>]", "'");
            FileUtils.writeStringToFile(Paths.get(path, title + ".html").toFile(), htmlBody, StandardCharsets.UTF_8);

        }
    }

    @Test
    public void getComment() throws IOException {
        String url = "https://xueqiu.com/query/v1/symbol/search/status.json?count=10&comment=0&symbol=SZ000876&hl=0&source=user&sort=time&page=1&q=&type=11";
        Connection.Response response = Jsoup.connect(url).method(Connection.Method.GET).userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36 Edg/99.0.1150.55").ignoreContentType(true).execute();
        String htmlBody = response.body();
        System.out.println(htmlBody);
    }
}
